#!/usr/bin/python
# Filename: tag_count.py
# Solution for prob5, question1
import math;

fr = file('ner_rep.counts');
gram_3_count = dict();
gram_2_count = dict();
for line in fr:
	words = line.strip().split(' ');
	if words[1] == '3-GRAM':
		words_pair = (words[2],words[3],words[4]);
		gram_3_count[words_pair] = words[0];
	if words[1] == '2-GRAM':
		words_pair = (words[2],words[3]);
		gram_2_count[words_pair] = words[0];
fr.close();
fw = open('trigram_prob.dat2','w');
for key in gram_3_count.keys():
		freq = float(gram_3_count[key])/float(gram_2_count[(key[0],key[1])]);
		lines = key[0]+' '+key[1]+' '+key[2]+' '+str(math.log(freq))+'\n';
		fw.write(lines);
fw.close();
		
		
